In [222]:
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from textblob import TextBlob
import json
import json
import pandas as pd
from nltk.corpus import state_union
import pandas as pd
from textblob import TextBlob
#import neuralcoref
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.patches import Rectangle
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
Chart of alot of Entites, not final product, just checking (charts take time to be made)¶
In [234]:
letsgo = pd.read_csv("Data_Processing/entities_counted_Partied.csv")
letsgo
Out[234]:
| _Year_ | _Party_ | the United States | Congress | American | America | Americans | State | States | Constitution | ... | this House of Representatives | Paul Pelosi | Hello | the NATOization of Finland | the Warsaw Uprising | Moldovan | Sandu | Kherson | the Orange Revolution | the Heavenly Hundred | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1789 | None (Independent) | 3 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 1790 | None (Independent) | 22 | 3 | 0 | 0 | 0 | 1 | 1 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 1791 | None (Independent) | 14 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 1792 | None (Independent) | 4 | 4 | 0 | 0 | 0 | 0 | 3 | 4 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 1793 | None (Independent) | 18 | 6 | 0 | 0 | 0 | 0 | 2 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 230 | 2019 | Republican | 53 | 26 | 54 | 47 | 28 | 0 | 1 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 231 | 2020 | Republican | 80 | 47 | 212 | 138 | 123 | 0 | 1 | 19 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 232 | 2021 | Democrat | 54 | 25 | 213 | 177 | 136 | 3 | 0 | 6 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 233 | 2022 | Democrat | 46 | 12 | 60 | 91 | 45 | 1 | 0 | 11 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 234 | 2023 | Democrat | 8 | 11 | 26 | 39 | 19 | 0 | 0 | 0 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
235 rows × 16100 columns
In [235]:
# Assuming df is your DataFrame
# Initialize a Min-Max Scaler
scaler = MinMaxScaler()
letgos = letsgo.drop(columns=letsgo.columns[0])
letgos = letgos.drop(columns=letgos.columns[0])
# Scale the data
letgos_scaled = pd.DataFrame(scaler.fit_transform(letgos), columns=letgos.columns, index=letsgo["_Year_"])
# Create the heatmap
plt.figure(figsize=(50, 100))
sns.heatmap(letgos_scaled.T, annot=False, cmap='Blues')
plt.show()
Obviously the above Data Visulization is just testign around with the data¶
Chart 1¶
In [236]:
data = [
"Europe: 1037", "Germany: 386", "Hitler: 56", "Jews: 34",
"World War II: 157", "Ukraine: 137", "Turkey: 134"
]
# Splitting each string at ':' and taking the first part
cleaned_list_Europe = [(item.split(':')[0] )for item in data]
##
##
##
non_democrat_df = letsgo[letsgo['_Party_'] != 'Democrat']
non_Republican_df = letsgo[letsgo['_Party_'] != 'Republican']
import matplotlib.pyplot as plt
# Assuming non_democrat_df is your DataFrame
subs = cleaned_list_Europe
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_democrat_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Red')
plt.xlabel('Entites', fontsize=20)
plt.ylabel('Sum', fontsize=20)
plt.title('World War Entites Sum Non Democrat' , fontsize=20)
plt.xticks(rotation=45, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
# Add a caption
plt.figtext(0.5, -0.4, "The bar chart displays the frequency of specific entities mentioned in all Republicn President Speeches since US presidency specific to World Wars.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_Republican_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Blue')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=45, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('World War Entites Sum Non Republican')
# Add a caption
plt.figtext(0.5, -0.4, "The bar chart displays the frequency of specific entities mentioned in all Democrat President Speeches since US presidency specific to World Wars.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
subs=non_democrat_df[cleaned_list_Europe]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Reds') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('World War Speech Entities Over The Years Non Democrat\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the frequency of specific entities mentioned in all Non-Democrat, including whips and non denomination parties, President Speeches since US presidency specific to World Wars in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
subs=non_Republican_df[cleaned_list_Europe]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Blues') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('World War Speech Entities Over The Years Non Republican\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the frequency of specific entities mentioned in all Non-Republican, including whips and non denomination parties, President Speeches since US presidency specific to World Wars in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
##
##
##
subs=letsgo[cleaned_list_Europe]
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Purples') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('World War Speech Entities Over The Years\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the frequency of specific entities mentioned in all President Speeches since US presidency specific to World Wars in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Assuming df is your DataFrame
# Initialize a Min-Max Scaler
scaler = MinMaxScaler()
# Scale the data
subs_scaled = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25,7))
sns.heatmap(subs_scaled.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('World War Speech Entities Over The Years MinMax Scaled\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in all President Speeches since US presidency specific to World Wars in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Initialize a Standard Scaler
scaler = StandardScaler()
# Scale the data
letsgo_standardized = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 7))
sns.heatmap(letsgo_standardized.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('World War Speech Entities Over The Years Standardized\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the Standardized frequency of specific entities mentioned in all President Speeches since US presidency specific to World Wars in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2528904079.py:67: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2528904079.py:101: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2528904079.py:139: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
Chart 2¶
In [237]:
data = [
"Cuba: 618", "Panama: 261", "Mexico: 978", "Nicaragua: 224",
"Cuban: 174", "the Panama Canal: 73", "Porto Rico: 72"
]
# Splitting each string at ':' and taking the first part
cleaned_list_South_American = [(item.split(':')[0] )for item in data]
##
##
##
non_democrat_df = letsgo[letsgo['_Party_'] != 'Democrat']
non_Republican_df = letsgo[letsgo['_Party_'] != 'Republican']
import matplotlib.pyplot as plt
# Assuming non_democrat_df is your DataFrame
subs = cleaned_list_South_American
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_democrat_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Red')
plt.xlabel('Entities', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=75, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('South American Entites Sum Non Democrat', fontsize =20)
# Add a caption
plt.figtext(0.5, -0.6, "The bar chart displays the frequency of specific entities mentioned in all Republican President Speeches since US presidency specific to South American Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_Republican_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Blue')
plt.xlabel('Entities', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation= 75, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('South American Entites Sum Non Republican', fontsize = 20)
# Add a caption
plt.figtext(0.5, -0.6, "The bar chart displays the frequency of specific entities mentioned in all Democrat President Speeches since US presidency specific to South American Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
subs=non_democrat_df[cleaned_list_South_American]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Reds') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('South American Speech Entities Over The Years Non Democrat\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the frequency of specific entities mentioned in all Non-Democratic, including whips and non denomination parties, President Speeches since US presidency specific to South American Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
subs=non_Republican_df[cleaned_list_South_American]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Blues') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('South American Speech Entities Over The Years Non Republican\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the frequency of specific entities mentioned in all Non-Republican, including whips and non denomination parties, President Speeches since US presidency specific to South American Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
##
##
##
subs=letsgo[cleaned_list_South_American]
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Purples') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('South American Speech Entities Over The Years\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the frequency of specific entities mentioned in all President Speeches since US presidency specific to South American Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Assuming df is your DataFrame
# Initialize a Min-Max Scaler
scaler = MinMaxScaler()
# Scale the data
subs_scaled = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25,7))
sns.heatmap(subs_scaled.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('South American Speech Entities Over The Years MinMax Scaled\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in all President Speeches since US presidency specific to South American Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Initialize a Standard Scaler
scaler = StandardScaler()
# Scale the data
letsgo_standardized = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 7))
sns.heatmap(letsgo_standardized.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('South American Speech Entities Over The Years Standardized\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the Standardized frequency of specific entities mentioned in all President Speeches since US presidency specific to South American Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/3754587022.py:68: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/3754587022.py:102: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/3754587022.py:137: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
Chart 3¶
In [238]:
data = [
"the Soviet Union: 613", "Russia: 467", "Soviet: 450",
"Communist: 354", "Soviets: 177", "Communists: 175"
]
# Splitting each string at ':' and taking the first part
# Splitting each string at ':' and taking the first part
cleaned_list_russian = [item.split(':')[0] for item in data]
##
##
##
non_democrat_df = letsgo[letsgo['_Party_'] != 'Democrat']
non_Republican_df = letsgo[letsgo['_Party_'] != 'Republican']
import matplotlib.pyplot as plt
# Assuming non_democrat_df is your DataFrame
subs = cleaned_list_russian
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_democrat_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Red')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=75, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Russian Entites Sum Non Democrat', fontsize=20)
# Add a caption
plt.figtext(0.5, -0.55, "The bar chart displays the frequency of specific entities mentioned in all Republican President Speeches since US presidency specific to Russian Involvements.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_Republican_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Blue')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=75, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Russian Entites Sum Non Republican', fontsize= 20)
# Add a caption
plt.figtext(0.5, -0.55, "The bar chart displays the frequency of specific entities mentioned in all Democrat President Speeches since US presidency specific to Russian Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
subs=non_democrat_df[cleaned_list_russian]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Reds') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Russian Speech Entities Over The Years Non Democrat\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the frequency of specific entities mentioned in all Non-Democratic, including whips and non denomination parties, President Speeches since US presidency specific to Cold War Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
subs=non_Republican_df[cleaned_list_russian]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Blues') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Russian Speech Entities Over The Years Non Republican\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the frequency of specific entities mentioned in all Non-Republican, including whips and non denomination parties, President Speeches since US presidency specific to Cold War Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
##
##
##
subs=letsgo[cleaned_list_russian]
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Purples') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Russian Speech Entities Over The Years\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the frequency of specific entities mentioned in all President Speeches since US presidency specific to Russian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Assuming df is your DataFrame
# Initialize a Min-Max Scaler
scaler = MinMaxScaler()
# Scale the data
subs_scaled = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25,7))
sns.heatmap(subs_scaled.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Russian Speech Entities Over The Years MinMax Scaled\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in all President Speeches since US presidency specific to Russian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Initialize a Standard Scaler
scaler = StandardScaler()
# Scale the data
letsgo_standardized = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 7))
sns.heatmap(letsgo_standardized.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Russian Speech Entities Over The Years Standardized\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.1, "The heatmap displays the Standardized frequency of specific entities mentioned in all President Speeches since US presidency specific to Russian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/170823032.py:71: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/170823032.py:105: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/170823032.py:141: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
Chart 4¶
In [239]:
data = [
"Iraq: 590", "the Middle East: 363", "Afghanistan: 458", "Iran: 282",
"Iraqi: 191", "Taliban: 118", "Afghan: 112", "al Qaeda: 106",
"Arab: 99", "Saddam Hussein: 75", "Saddam Hussein's: 13",
"Iraqis: 75", "Somalia: 73", "Muslim: 48", "Al Qaeda: 45",
"Osama bin Laden: 30", "Afghans: 30", "Assad: 30"
]
# Splitting each string at ':' and taking the first part
# Splitting each string at ':' and taking the first part
cleaned_list_Middle_east = [item.split(':')[0] for item in data]
##
##
##
non_democrat_df = letsgo[letsgo['_Party_'] != 'Democrat']
non_Republican_df = letsgo[letsgo['_Party_'] != 'Republican']
import matplotlib.pyplot as plt
# Assuming non_democrat_df is your DataFrame
subs = cleaned_list_Middle_east
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_democrat_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Red')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=90, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Middle East Entites Sum Non Democrat', fontsize= 20)
# Add a caption
plt.figtext(0.5, -0.6, "The bar chart displays the frequency of specific entities mentioned in all Republican President Speeches since US presidency specific to Middle Eastern Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_Republican_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Blue')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=90, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Middle East Entites Sum Non Republican', fontsize= 20)
# Add a caption
plt.figtext(0.5, -0.6, "The bar chart displays the frequency of specific entities mentioned in all Democrat President Speeches since US presidency specific to Middle Eastern Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
subs=non_democrat_df[cleaned_list_Middle_east]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 10)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Reds') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Middle East Speech Entities Over The Years Non Democrat\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Democratic, including whips and non denomination parties, President Speeches since US presidency specific to Middle Eastern Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
subs=non_Republican_df[cleaned_list_Middle_east]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 10)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Blues') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Middle East Speech Entities Over The Years Non Republican\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Republican, including whips and non denomination parties, President Speeches since US presidency specific to Middle Eastern Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
##
##
##
subs=letsgo[cleaned_list_Middle_east]
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 10)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Purples') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Middle East Speech Entities Over The Years\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all President Speeches since US presidency specific to Middle East Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Assuming df is your DataFrame
# Initialize a Min-Max Scaler
scaler = MinMaxScaler()
# Scale the data
subs_scaled = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25,10))
sns.heatmap(subs_scaled.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Middle East Speech Entities Over The Years MinMax Scaled\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in all President Speeches since US presidency specific to Middle East Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Initialize a Standard Scaler
scaler = StandardScaler()
# Scale the data
letsgo_standardized = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 10))
sns.heatmap(letsgo_standardized.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Middle East Speech Entities Over The Years Standardized\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the Standardized frequency of specific entities mentioned in all President Speeches since US presidency specific to Middle East Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/3291119174.py:75: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/3291119174.py:109: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/3291119174.py:145: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
Chart 5¶
In [240]:
data = [
"Vietnam: 473", "Japan: 387", "Viet-Nam: 145", "South Vietnam: 165",
"Korea: 174", "Philippines: 160", "Japanese: 157",
"South Vietnamese: 112", "South Viet-Nam: 111", "North Vietnam: 111",
"China: 841", "Chinese: 361"
]
# Splitting each string at ':' and taking the first part
# Splitting each string at ':' and taking the first part
cleaned_list_Asia = [item.split(':')[0] for item in data]
##
##
##
non_democrat_df = letsgo[letsgo['_Party_'] != 'Democrat']
non_Republican_df = letsgo[letsgo['_Party_'] != 'Republican']
import matplotlib.pyplot as plt
# Assuming non_democrat_df is your DataFrame
subs = cleaned_list_Asia
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_democrat_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Red')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=90, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Asian Entites Sum Non Democrat', fontsize=20)
# Add a caption
plt.figtext(0.5, -0.6, "The bar chart displays the frequency of specific entities mentioned in all Republican President Speeches since US presidency specific to Asian Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_Republican_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Blue')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=90, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Asian Entites Sum Non Republican', fontsize=20)
# Add a caption
plt.figtext(0.5, -0.6, "The bar chart displays the frequency of specific entities mentioned in all Democrat President Speeches since US presidency specific to Asian Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
subs=non_democrat_df[cleaned_list_Asia]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 10)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Reds') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Asia Speech Entities Over The Years Non Democrat\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Democratic, including whips and non denomination parties, President Speeches since US presidency specific to Asian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
subs=non_Republican_df[cleaned_list_Asia]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 10)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Blues') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Asia Speech Entities Over The Years Non Republican\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Republican, including whips and non denomination parties, President Speeches since US presidency specific to Asian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
##
##
##
subs=letsgo[cleaned_list_Asia]
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 10)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Purples') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Asia Speech Entities Over The Years\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all President Speeches since US presidency specific to Asian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Assuming df is your DataFrame
# Initialize a Min-Max Scaler
scaler = MinMaxScaler()
# Scale the data
subs_scaled = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 10))
sns.heatmap(subs_scaled.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Asia Speech Entities Over The Years MinMax Scaled\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in all President Speeches since US presidency specific to Asian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Initialize a Standard Scaler
scaler = StandardScaler()
# Scale the data
letsgo_standardized = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 10))
sns.heatmap(letsgo_standardized.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Asia Speech Entities Over The Years Standardized\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the Standardized frequency of specific entities mentioned in all President Speeches since US presidency specific to Asian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1824117806.py:73: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1824117806.py:107: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1824117806.py:143: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
Chart 6¶
In [241]:
data = [
"Israel: 332", "Palestinians: 70", "Palestinian: 68",
"Israelis: 62", "Palestine: 26", "Israeli: 58", "Jews: 34"
]
# Splitting each string at ':' and taking the first part
cleaned_list_I_P = [item.split(':')[0] for item in data]
##
##
##
non_democrat_df = letsgo[letsgo['_Party_'] != 'Democrat']
non_Republican_df = letsgo[letsgo['_Party_'] != 'Republican']
import matplotlib.pyplot as plt
# Assuming non_democrat_df is your DataFrame
subs = cleaned_list_I_P
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_democrat_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Red')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=45, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('I/P Entites Sum Non Democrat', fontsize=20)
# Add a caption
plt.figtext(0.5, -0.4, "The bar chart displays the frequency of specific entities mentioned in all Republican President Speeches since US presidency specific to Isreal and Palestinian Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_Republican_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Blue')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=45, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('I/P Entites Sum Non Republican', fontsize=20)
# Add a caption
plt.figtext(0.5, -0.4, "The bar chart displays the frequency of specific entities mentioned in all Democrat President Speeches since US presidency specific to Israel and Palestinian Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
subs=non_democrat_df[cleaned_list_I_P]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Reds') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('I/P Speech Entities Over The Years Non Democrat\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Democratic, including whips and non denomination parties, President Speeches since US presidency specific to Israel and Palestinian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
subs=non_Republican_df[cleaned_list_I_P]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Blues') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('I/P Speech Entities Over The Years Non Republican\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Republican, including whips and non denomination parties, President Speeches since US presidency specific to Israel and Palestinian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
##
##
##
subs=letsgo[cleaned_list_I_P]
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Purples') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('I/P Speech Entities Over The Years\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all President Speeches since US presidency specific to Israel and Palestinian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Assuming df is your DataFrame
# Initialize a Min-Max Scaler
scaler = MinMaxScaler()
# Scale the data
subs_scaled = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25,7))
sns.heatmap(subs_scaled.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('I/P Speech Entities Over The Years MinMax Scaled\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in all President Speeches since US presidency specific to Israel and Palestinian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Initialize a Standard Scaler
scaler = StandardScaler()
# Scale the data
letsgo_standardized = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 7))
sns.heatmap(letsgo_standardized.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('I/P Speech Entities Over The Years Standardized\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the Standardized frequency of specific entities mentioned in all President Speeches since US presidency specific to Israel and Palestinian Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1824223548.py:69: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1824223548.py:103: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1824223548.py:139: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
Chart 7¶
In [242]:
data = [
"Negro: 84", "the Civil War: 56", "African American: 34",
"Negroes: 25", "Africans: 22", "African-American: 15",
"African-Americans: 15", "Native American: 15"
]
# Splitting each string at ':' and taking the first part
cleaned_list_Minority = [item.split(':')[0] for item in data]
##
##
##
non_democrat_df = letsgo[letsgo['_Party_'] != 'Democrat']
non_Republican_df = letsgo[letsgo['_Party_'] != 'Republican']
import matplotlib.pyplot as plt
# Assuming non_democrat_df is your DataFrame
subs = cleaned_list_Minority
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_democrat_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Red')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=75, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Minority Entites Sum Non Democrat', fontsize=20)
# Add a caption
plt.figtext(0.5, -0.6, "The bar chart displays the frequency of specific entities mentioned in all Republican President Speeches since US presidency specific to Minority Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_Republican_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Blue')
plt.xlabel('Entites', fontsize=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=75, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Minority Entites Sum Non Republican', fontsize=20)
# Add a caption
plt.figtext(0.5, -0.6, "The bar chart displays the frequency of specific entities mentioned in all Democrat President Speeches since US presidency specific to Minority Involvement.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
subs=non_democrat_df[cleaned_list_Minority]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Reds') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Minority Speech Entities Over The Years Non Democrat\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Democratic, including whips and non denomination parties, President Speeches since US presidency specific to Minority Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
subs=non_Republican_df[cleaned_list_Minority]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Blues') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Minority Speech Entities Over The Years Non Republican\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Republican, including whips and non denomination parties, President Speeches since US presidency specific to Minority Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
##
##
##
subs=letsgo[cleaned_list_Minority]
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 7)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Purples') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Minority Speech Entities Over The Years\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all President Speeches since US presidency specific to Minority Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Assuming df is your DataFrame
# Initialize a Min-Max Scaler
scaler = MinMaxScaler()
# Scale the data
subs_scaled = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25,7))
sns.heatmap(subs_scaled.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Minority Speech Entities Over The Years MinMax Scaled\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in all President Speeches since US presidency specific to Minority Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Initialize a Standard Scaler
scaler = StandardScaler()
# Scale the data
letsgo_standardized = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 7))
sns.heatmap(letsgo_standardized.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Minority Speech Entities Over The Years Standardized\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the Standardized frequency of specific entities mentioned in all President Speeches since US presidency specific to Minority Involvement in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1375633772.py:71: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1375633772.py:105: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1375633772.py:141: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
Chart 8¶
In [243]:
data = [
"the United Nations: 407", "Medicare: 252", "Ford: 190", "Kennedy: 276",
"Trump: 91", "Truman: 74", "Lincoln: 73", "Martin Luther King: 31",
"Bush: 163", "Reagan: 149", "Nixon: 147", "Pakistan: 81",
"Watergate: 73", "the Civil War: 56", "COVID-19: 62", "Great Britain: 679",
"British: 603"
]
# Splitting each string at ':' and taking the first part
cleaned_list_assorted = [item.split(':')[0] for item in data]
##
##
##
non_democrat_df = letsgo[letsgo['_Party_'] != 'Democrat']
non_Republican_df = letsgo[letsgo['_Party_'] != 'Republican']
import matplotlib.pyplot as plt
# Assuming non_democrat_df is your DataFrame
subs = cleaned_list_assorted
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_democrat_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Red')
plt.xlabel('Entites', fontsize=20, labelpad=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=90, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Other Entites Sum Non Democrat', fontsize=20)
# Add a caption
plt.figtext(0.5, -0.65, "The bar chart displays the frequency of specific entities mentioned in all Republican President Speeches since US presidency non-specific to any event or topic but that seemed of interest.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
# Calculate the sum of each column
sums = []
for column in subs:
sums.append(non_Republican_df[column].sum())
# Create a bar chart using the sums
plt.bar(subs, sums, color='Blue')
plt.xlabel('Entites', fontsize=20, labelpad=20 )
plt.ylabel('Sum', fontsize=20)
plt.xticks(rotation=90, fontsize=15) # Rotate the x labels for better readability
plt.yticks( fontsize=15)
plt.title('Other Entites Sum Non Republican', fontsize=20)
# Add a caption
plt.figtext(0.5, -0.65, "The bar chart displays the frequency of specific entities mentioned in all Democrat President Speeches since US presidency non-specific to any event or topic but that seemed of interest.", wrap=True, horizontalalignment='center', fontsize=13)
plt.show()
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
subs=non_democrat_df[cleaned_list_assorted]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 10)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Reds') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Other Speech Entities Over The Years Non Democrat\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Democratic, including whips and non denomination parties, President Speeches since US presidency non-specific to any event or topic but that seemed of interest, in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
subs=non_Republican_df[cleaned_list_assorted]
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 10)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Blues') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Other Speech Entities Over The Years Non Republican\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all Non-Democratic, including whips and non denomination parties, President Speeches since US presidency non-specific to any event or topic but that seemed of interest, in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
##
##
##
subs=letsgo[cleaned_list_assorted]
def map_to_interval(year):
return f"{(year // 10) * 10}-{(year // 10) * 10 + 9}"
# Assign years to intervals
subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
# Group by interval and sum
result_df = subs.groupby('Interval').sum()
result_df.index= result_df.index.astype(str).str[:4]
result_df.rename_axis("year", inplace=True)
# Set the size of the heatmap
plt.figure(figsize=(25, 10)) # You can adjust the size as needed
# Create the heatmap
sns.heatmap(result_df.T, annot=False, cmap='Purples') # 'annot=False' hides the data values, 'cmap' sets the color map
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Other Speech Entities Over The Years\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the frequency of specific entities mentioned in all President Speeches since US presidency non-specific to any event or topic but that seemed of interest, in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Assuming df is your DataFrame
# Initialize a Min-Max Scaler
scaler = MinMaxScaler()
# Scale the data
subs_scaled = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 10))
sns.heatmap(subs_scaled.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Other Speech Entities Over The Years MinMax Scaled\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in all President Speeches since US presidency non-specific to any event or topic but that seemed of interest, in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
# Initialize a Standard Scaler
scaler = StandardScaler()
# Scale the data
letsgo_standardized = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
# Create the heatmap
plt.figure(figsize=(25, 10))
sns.heatmap(letsgo_standardized.T, annot=False, cmap='Purples')
plt.xlabel('Year', fontsize = 20)
plt.yticks(fontsize = 20, rotation = 360)
plt.xticks(fontsize = 17)
plt.title('Other Speech Entities Over The Years Standardized\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.05, "The heatmap displays the Standardized frequency of specific entities mentioned in all President Speeches since US presidency non-specific to any event or topic but that seemed of interest, in a temporal dimension cut up by decades.", wrap=True, horizontalalignment='center', fontsize=20)
#plt.savefig('Speech_heatmap.png');
plt.show()
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2766536643.py:74: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2766536643.py:108: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2766536643.py:144: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subs['Interval'] = letsgo["_Year_"].astype(int).map(map_to_interval)
Case Studies of Entites mentioned compared from President to President¶
Case Study 1 World War II Entites¶
In [244]:
samp_sorted= pd.read_csv('Data_Processing/outputfinal_filename.csv')
startyear=1939
endyear=1945
startDate=19390901 # Invasion of Poland, attack on Poland by Nazi Germany that marked the start of World War II. The invasion lasted from September 1 to October 5, 1939. As dawn broke on September 1, 1939, German forces launched a surprise attack on Poland.
endDate=19450814 # US President Harry S. Truman announced Japan's surrender and the end of World War II
gents = [
"Europe", "Germany", "Hitler", "Jews",
"World War II", 'Navy', 'Army', 'Poland', 'Japan', 'Japanese'
]
bummy = samp_sorted.loc[(samp_sorted['Year'] >= startyear) & (samp_sorted['Year'] <= endyear)]
bummy.reset_index(inplace=True)
bummy.drop('index', axis=1, inplace=True)
df1 = pd.DataFrame(columns=['doc_name', 'date', 'transcript', 'president', 'title', 'Year'])
for i in range(len(bummy)):
date_string = bummy.loc[bummy.index[i], 'date'] # Accessing the 'date' column for each row
cleaned_string = date_string.replace('-', '')
# Convert string to number
result = int(cleaned_string)
if startDate <= result <= endDate:
# Assuming df1 is the DataFrame you want to add a row to, and df2 is the DataFrame from which you want to add the row
row_to_add = bummy.loc[i] # Select the row you want to add from df2
df1.loc[len(df1)] = row_to_add.values
# Ensure the 'date' column is in datetime format
df1['date'] = pd.to_datetime(df1['date'])
# Create a DataFrame with each president's first and last speech date
president_dates = df1.groupby('president')['date'].agg(['min', 'max']).reset_index()
president_dates.columns = ['president', 'first_speech', 'last_speech']
# Print the new DataFrame
print(president_dates)
# Create an empty DataFrame to store the counts
fumpy = pd.DataFrame(columns=['president'] + gents)
# Iterate over each president in df1
for president in df1['president'].unique():
# Initialize a dictionary to store the counts for each president
president_counts = {'president': president}
# Filter df1 for speeches by the current president
president_df = df1[df1['president'] == president]
# Iterate over each word in gents
for word in gents:
# Count the occurrences of the word in the current president's speeches
word_count = sum(president_df['transcript'].str.count(word))
# Add the count to the dictionary
president_counts[word] = word_count
# Append the counts for the current president to fumpy
fumpy = fumpy.append(president_counts, ignore_index=True)
# Set the 'president' column as the index
fumpy.set_index('president', inplace=True)
# Calculate the sum of each column and sort the columns by the sum in descending order
fumpy = fumpy.loc[:, fumpy.sum().sort_values(ascending=False).index]
# Print the DataFrame fumpy
print(fumpy)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1897943000.py:26: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
bummy.drop('index', axis=1, inplace=True)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1897943000.py:80: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
fumpy = fumpy.append(president_counts, ignore_index=True)
president first_speech last_speech
0 Franklin D. Roosevelt 1939-09-03 1945-03-01
1 Harry S. Truman 1945-04-16 1945-08-09
Japan Germany Europe Japanese Hitler Poland Army Navy \
president
Franklin D. Roosevelt 149 107 92 86 55 38 51 49
Harry S. Truman 19 23 15 10 2 17 3 0
Jews World War II
president
Franklin D. Roosevelt 0 0
Harry S. Truman 0 0
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/1897943000.py:80: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True)
In [245]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.patches import Rectangle
from matplotlib.patches import Patch
# Fill missing values with zeros
fumpy_filled = fumpy.fillna(0)
# Convert fumpy to numeric type
fumpy_numeric = fumpy_filled.apply(pd.to_numeric)
# Set the size of the heatmap
plt.figure(figsize=(15, 7)) # You can adjust the size as needed
# Create the transposed DataFrame
fumpy_transposed = fumpy_numeric.T
# Create the heatmap without annotations first
sns.heatmap(fumpy_transposed, annot=False, cmap='Blues')
# Annotate each cell with the corresponding value
for i in range(len(fumpy_transposed.index)):
for j in range(len(fumpy_transposed.columns)):
plt.text(j + 0.5, i + 0.5, fumpy_transposed.iloc[i, j], ha='center', va='center', color='red', fontsize=17)
# Set custom x-axis tick colors
x_labels = fumpy_transposed.columns
x_colors = ['red' if label == 'Franklin D. Roosevelt' else 'blue' for label in x_labels]
# Map the start and end dates for each president
date_mapping = {
row['president']: (row['first_speech'].strftime('%Y-%m-%d'), row['last_speech'].strftime('%Y-%m-%d'))
for _, row in president_dates.iterrows()
}
# Use plt.text to position the x-tick labels manually below the heatmap
y_pos_below = 10.5 # Adjust as needed to position labels below the heatmap
for idx, (label, color) in enumerate(zip(x_labels, x_colors)):
plt.text(idx + 0.5, y_pos_below, label, ha='center', va='center', fontsize=20, color=color)
start_date, end_date = date_mapping[label]
plt.text(idx + 0.5, y_pos_below + 0.8, f'{start_date} - {end_date}', ha='center', va='center', fontsize=15, color='black')
legend_elements = [
Patch(facecolor='blue', edgecolor='blue', label='Democrat'),
Patch(facecolor='red', edgecolor='red', label='Republican')
]
plt.legend(handles=legend_elements,
loc='lower left',
bbox_to_anchor=(-0.3, -0.2), # Coordinates for 'bbox_to_anchor' are relative to the axes
title="Fontcolor of Presidents",
fontsize='small', # Adjust text size
title_fontsize='medium', # Adjust title font size
frameon=True, # Toggle the frame
shadow=True) # Add shadow for better visibility
plt.xticks(ticks=[], labels=[]) # Remove default xticks to avoid overlap
# Set y-axis tick labels (word list)
plt.yticks(ticks=range(len(fumpy_transposed.index)), labels=fumpy_transposed.index, fontsize=20)
plt.xlabel('President', fontsize=20, labelpad=65)
plt.ylabel('Entities', fontsize=20)
plt.title(f'World War II Entity Frequency by Presidents \n {startyear} - {endyear} \n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.2, "The heatmap displays the frequency of specific entities mentioned in President Speeches during the signifiacnt years of World War II, with annotations indicating the start and end dates of of presidents in office during that time period, which started with Invasion of Poland by Nazi Germany that marked the start of World War II and ended with the US President Harry S. Truman announcement of Japan's surrender. Red labels highlight Republican Presidents and Blue higlights Depmocraric Presidents.", wrap=True, horizontalalignment='center', fontsize=15)
plt.show()
# Initialize a StandardScaler
scaler = StandardScaler()
# Scale the data
fumpy_standardized = pd.DataFrame(scaler.fit_transform(fumpy_transposed), columns=fumpy_transposed.columns, index=fumpy_transposed.index)
# Set the size of the heatmap
plt.figure(figsize=(15, 7)) # You can adjust the size as needed
# Create the transposed DataFrame
fumpy_transposed = fumpy_numeric.T
# Create the heatmap without annotations first
sns.heatmap(fumpy_standardized, annot=False, cmap='Blues')
# Annotate each cell with the corresponding value
for i in range(len(fumpy_transposed.index)):
for j in range(len(fumpy_transposed.columns)):
plt.text(j + 0.5, i + 0.5, fumpy_transposed.iloc[i, j], ha='center', va='center', color='red', fontsize=17)
# Set custom x-axis tick colors
x_labels = fumpy_transposed.columns
x_colors = ['red' if label == 'Franklin D. Roosevelt' else 'blue' for label in x_labels]
# Map the start and end dates for each president
date_mapping = {
row['president']: (row['first_speech'].strftime('%Y-%m-%d'), row['last_speech'].strftime('%Y-%m-%d'))
for _, row in president_dates.iterrows()
}
# Use plt.text to position the x-tick labels manually below the heatmap
y_pos_below = 10.5 # Adjust as needed to position labels below the heatmap
for idx, (label, color) in enumerate(zip(x_labels, x_colors)):
plt.text(idx + 0.5, y_pos_below, label, ha='center', va='center', fontsize=20, color=color)
start_date, end_date = date_mapping[label]
plt.text(idx + 0.5, y_pos_below + 0.8, f'{start_date} - {end_date}', ha='center', va='center', fontsize=15, color='black')
legend_elements = [
Patch(facecolor='blue', edgecolor='blue', label='Democrat'),
Patch(facecolor='red', edgecolor='red', label='Republican')
]
plt.legend(handles=legend_elements,
loc='lower left',
bbox_to_anchor=(-0.3, -0.2), # Coordinates for 'bbox_to_anchor' are relative to the axes
title="Fontcolor of Presidents",
fontsize='small', # Adjust text size
title_fontsize='medium', # Adjust title font size
frameon=True, # Toggle the frame
shadow=True) # Add shadow for better visibility
plt.xticks(ticks=[], labels=[]) # Remove default xticks to avoid overlap
# Set y-axis tick labels (word list)
plt.yticks(ticks=range(len(fumpy_transposed.index)), labels=fumpy_transposed.index, fontsize=20)
plt.xlabel('President', fontsize=20, labelpad=65)
plt.ylabel('Entities', fontsize=20)
plt.title(f'World War II Entity Frequency by Presidents \n {startyear} - {endyear} Standardized \n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.2, "The heatmap displays the Standardized frequency of specific entities mentioned in President Speeches during the signifiacnt years of World War II, with annotations indicating the start and end dates of of presidents in office during that time period, which started with Invasion of Poland by Nazi Germany that marked the start of World War II and ended with the US President Harry S. Truman announcement of Japan's surrender. Red labels highlight Republican Presidents and Blue higlights Depmocraric Presidents.", wrap=True, horizontalalignment='center', fontsize=15)
plt.show()
# Initialize a MinMaxScaler
scaler = MinMaxScaler()
# Scale the data
fumpy_scaled = pd.DataFrame(scaler.fit_transform(fumpy_transposed), columns=fumpy_transposed.columns, index=fumpy_transposed.index)
# Set the size of the heatmap
plt.figure(figsize=(15, 7)) # You can adjust the size as needed
# Create the transposed DataFrame
fumpy_transposed = fumpy_numeric.T
# Create the heatmap without annotations first
sns.heatmap(fumpy_scaled, annot=False, cmap='Blues')
# Annotate each cell with the corresponding value
for i in range(len(fumpy_transposed.index)):
for j in range(len(fumpy_transposed.columns)):
plt.text(j + 0.5, i + 0.5, fumpy_transposed.iloc[i, j], ha='center', va='center', color='red', fontsize=17)
# Set custom x-axis tick colors
x_labels = fumpy_transposed.columns
x_colors = ['red' if label == 'Franklin D. Roosevelt' else 'blue' for label in x_labels]
# Map the start and end dates for each president
date_mapping = {
row['president']: (row['first_speech'].strftime('%Y-%m-%d'), row['last_speech'].strftime('%Y-%m-%d'))
for _, row in president_dates.iterrows()
}
# Use plt.text to position the x-tick labels manually below the heatmap
y_pos_below = 10.5 # Adjust as needed to position labels below the heatmap
for idx, (label, color) in enumerate(zip(x_labels, x_colors)):
plt.text(idx + 0.5, y_pos_below, label, ha='center', va='center', fontsize=20, color=color)
start_date, end_date = date_mapping[label]
plt.text(idx + 0.5, y_pos_below + 0.8, f'{start_date} - {end_date}', ha='center', va='center', fontsize=15, color='black')
legend_elements = [
Patch(facecolor='blue', edgecolor='blue', label='Democrat'),
Patch(facecolor='red', edgecolor='red', label='Republican')
]
plt.legend(handles=legend_elements,
loc='lower left',
bbox_to_anchor=(-0.3, -0.2), # Coordinates for 'bbox_to_anchor' are relative to the axes
title="Fontcolor of Presidents",
fontsize='small', # Adjust text size
title_fontsize='medium', # Adjust title font size
frameon=True, # Toggle the frame
shadow=True) # Add shadow for better visibility
plt.xticks(ticks=[], labels=[]) # Remove default xticks to avoid overlap
# Set y-axis tick labels (word list)
plt.yticks(ticks=range(len(fumpy_transposed.index)), labels=fumpy_transposed.index, fontsize=20)
plt.xlabel('President', fontsize=20, labelpad=65)
plt.ylabel('Entities', fontsize=20)
plt.title(f'World War II Entity Frequency by Presidents \n {startyear} - {endyear} MinMax Scaled \n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.2, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in President Speeches during the signifiacnt years of World War II, with annotations indicating the start and end dates of of presidents in office during that time period, which started with Invasion of Poland by Nazi Germany that marked the start of World War II and ended with the US President Harry S. Truman announcement of Japan's surrender. Red labels highlight Republican Presidents and Blue higlights Depmocraric Presidents.", wrap=True, horizontalalignment='center', fontsize=15)
plt.show()
Case Study 2 Cold War Entites¶
In [246]:
samp_sorted= pd.read_csv('Data_Processing/outputfinal_filename.csv')
startyear=1947
endyear=1991
startDate=19470312 # The Cold War began with the announcement of the Truman Doctrine in 1947, started a gradual winding down with the Sino-Soviet split between the Soviets and the People's Republic of China in 1961, and ended with the collapse of the Soviet Union in 1991.
endDate=19911225 # Gorbachev resigned on 25 December 1991 and what was left of the Soviet parliament voted to end itself. the Soviet Union itself dissolved into its component republics. Sign of the end of the cold war.
gents = [
"the Soviet Union", "Russia", "Soviet",
"Communist", "Soviets", "Communists", 'Army', 'Navy'
]
bummy = samp_sorted.loc[(samp_sorted['Year'] >= startyear) & (samp_sorted['Year'] <= endyear)]
bummy.reset_index(inplace=True)
bummy.drop('index', axis=1, inplace=True)
df1 = pd.DataFrame(columns=['doc_name', 'date', 'transcript', 'president', 'title', 'Year'])
prev=startDate
for i in range(len(bummy)):
date_string = bummy.loc[bummy.index[i], 'date'] # Accessing the 'date' column for each row
cleaned_string = date_string.replace('-', '')
# Convert string to number
if 'T' in cleaned_string:
cleaned_string=cleaned_string.split('T')[0]
result = int(cleaned_string)
if result<9999:
original_str = str(prev)
bluuuh = str(result)
# Cut off the last four characters
last_four = original_str[-4:]
# Combine the remaining string with the new string
res = bluuuh + last_four
result =int(res)
prev=result
fumm=str(result)
summmy=fumm[:4] + '-' + fumm[4:6] + '-' + fumm[6:]
bummy.loc[bummy.index[i], 'date'] = summmy
if startDate <= result <= endDate:
# Assuming df1 is the DataFrame you want to add a row to, and df2 is the DataFrame from which you want to add the row
row_to_add = bummy.loc[i] # Select the row you want to add from df2
df1.loc[len(df1)] = row_to_add.values
# Ensure the 'date' column is in datetime format
#df1['date'] = pd.to_datetime(df1['date'])
# Ensure the 'date' column is in datetime format
df1['date'] = pd.to_datetime(df1['date'])
# Create a DataFrame with each president's first and last speech date
president_dates = df1.groupby('president')['date'].agg(['min', 'max']).reset_index()
president_dates.columns = ['president', 'first_speech', 'last_speech']
# Print the new DataFrame
print(president_dates)
# Create an empty DataFrame to store the counts
fumpy = pd.DataFrame(columns=['president'] + gents)
# Iterate over each president in df1
for president in df1['president'].unique():
# Initialize a dictionary to store the counts for each president
president_counts = {'president': president}
# Filter df1 for speeches by the current president
president_df = df1[df1['president'] == president]
# Iterate over each word in gents
for word in gents:
# Count the occurrences of the word in the current president's speeches
word_count = sum(president_df['transcript'].str.count(word))
# Add the count to the dictionary
president_counts[word] = word_count
# Append the counts for the current president to fumpy
fumpy = fumpy.append(president_counts, ignore_index=True)
# Set the 'president' column as the index
fumpy.set_index('president', inplace=True)
# Calculate the sum of each column and sort the columns by the sum in descending order
fumpy = fumpy.loc[:, fumpy.sum().sort_values(ascending=False).index]
republican_presidents_df = pd.DataFrame({
"Name": [
"Dwight D. Eisenhower",
"Richard M. Nixon",
"Gerald Ford",
"Ronald Reagan",
"George H. W. Bush"
]
})
# Print the DataFrame fumpy
print(fumpy)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:26: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
bummy.drop('index', axis=1, inplace=True)
president first_speech last_speech
0 Dwight D. Eisenhower 1953-01-20 1961-01-17
1 George H. W. Bush 1988-08-18 1991-07-31
2 Gerald Ford 1974-08-09 1977-01-12
3 Harry S. Truman 1947-03-12 1953-01-15
4 Jimmy Carter 1976-09-23 1981-01-14
5 John F. Kennedy 1960-07-15 1963-10-26
6 Lyndon B. Johnson 1963-05-30 1969-01-14
7 Richard M. Nixon 1952-09-23 1974-08-09
8 Ronald Reagan 1964-10-27 1989-01-11
Soviet the Soviet Union Communist Communists Soviets \
president
Harry S. Truman 30 12 45 17 0
Richard M. Nixon 43 25 45 15 5
Dwight D. Eisenhower 103 34 51 13 17
John F. Kennedy 159 85 178 79 17
Lyndon B. Johnson 120 70 182 47 5
Ronald Reagan 514 182 29 4 106
Gerald Ford 21 18 3 0 0
Jimmy Carter 161 95 14 3 12
George H. W. Bush 137 71 2 0 11
Russia Army Navy
president
Harry S. Truman 7 2 1
Richard M. Nixon 0 2 0
Dwight D. Eisenhower 14 4 7
John F. Kennedy 17 12 3
Lyndon B. Johnson 30 24 9
Ronald Reagan 23 7 8
Gerald Ford 0 2 2
Jimmy Carter 15 5 8
George H. W. Bush 2 4 1
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:116: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:116: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:116: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:116: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:116: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:116: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:116: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:116: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/71674071.py:116: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True)
In [247]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.patches import Rectangle
# Fill missing values with zeros
fumpy_filled = fumpy.fillna(0)
# Convert fumpy to numeric type
fumpy_numeric = fumpy_filled.apply(pd.to_numeric)
# Set the size of the heatmap
plt.figure(figsize=(15, 7)) # You can adjust the size as needed
# Create the transposed DataFrame
fumpy_transposed = fumpy_numeric.T
# Create the heatmap without annotations first
sns.heatmap(fumpy_transposed, annot=False, cmap='Blues')
# Annotate each cell with the corresponding value
for i in range(len(fumpy_transposed.index)):
for j in range(len(fumpy_transposed.columns)):
plt.text(j + 0.5, i + 0.5, fumpy_transposed.iloc[i, j], ha='center', va='center', color='red', fontsize=17)
# Set custom x-axis tick colors
x_labels = fumpy_transposed.columns
x_colors = ['red' if label in republican_presidents_df['Name'].values else 'blue' for label in x_labels]
# Map the start and end dates for each president
date_mapping = {
row['president']: (row['first_speech'].strftime('%Y-%m-%d'), row['last_speech'].strftime('%Y-%m-%d'))
for _, row in president_dates.iterrows()
}
# Use plt.text to position the x-tick labels manually below the heatmap
y_pos_below = 9 # Adjust as needed to position labels below the heatmap
for idx, (label, color) in enumerate(zip(x_labels, x_colors)):
plt.text(idx , y_pos_below, label, ha='center', va='center', fontsize=15, color=color, rotation=40)
start_date, end_date = date_mapping[label]
plt.text(idx+ .2, y_pos_below + .2, f'{start_date} - {end_date}', ha='center', va='center', fontsize=12, color='black', rotation=40)
legend_elements = [
Patch(facecolor='blue', edgecolor='blue', label='Democrat'),
Patch(facecolor='red', edgecolor='red', label='Republican')
]
plt.legend(handles=legend_elements,
loc='lower left',
bbox_to_anchor=(-0.3, -0.2), # Coordinates for 'bbox_to_anchor' are relative to the axes
title="Fontcolor of Presidents",
fontsize='small', # Adjust text size
title_fontsize='medium', # Adjust title font size
frameon=True, # Toggle the frame
shadow=True) # Add shadow for better visibility
plt.xticks(ticks=[], labels=[]) # Remove default xticks to avoid overlap
# Set y-axis tick labels (word list)
plt.yticks(ticks=range(len(fumpy_transposed.index)), labels=fumpy_transposed.index, fontsize=20)
plt.xlabel('President', fontsize=20, labelpad=130)
plt.ylabel('Entities', fontsize=20)
plt.title(f'Cold War Entity Frequency by Presidents \n {startyear} - {endyear} \n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.3, "The heatmap displays the frequency of specific entities mentioned in President Speeches during the signifiacnt years of Cold War, with annotations indicating the start and end dates of of presidents in office during that time period starting with the announcement of the Truman Doctrine in 1947 and ends with Gorbachev resigning and what was left of the Soviet parliament voted to end itself. Red labels highlight Republican Presidents and Blue higlights Depmocraric Presidents.", wrap=True, horizontalalignment='center', fontsize=15)
plt.show()
# Initialize a StandardScaler
scaler = StandardScaler()
# Scale the data
fumpy_standardized = pd.DataFrame(scaler.fit_transform(fumpy_transposed), columns=fumpy_transposed.columns, index=fumpy_transposed.index)
# Set the size of the heatmap
plt.figure(figsize=(15, 7)) # You can adjust the size as needed
# Create the transposed DataFrame
fumpy_transposed = fumpy_numeric.T
# Create the heatmap without annotations first
sns.heatmap(fumpy_standardized, annot=False, cmap='Blues')
# Annotate each cell with the corresponding value
for i in range(len(fumpy_transposed.index)):
for j in range(len(fumpy_transposed.columns)):
plt.text(j + 0.5, i + 0.5, fumpy_transposed.iloc[i, j], ha='center', va='center', color='red', fontsize=17)
# Set custom x-axis tick colors
x_labels = fumpy_transposed.columns
x_colors = ['red' if label in republican_presidents_df['Name'].values else 'blue' for label in x_labels]
# Map the start and end dates for each president
date_mapping = {
row['president']: (row['first_speech'].strftime('%Y-%m-%d'), row['last_speech'].strftime('%Y-%m-%d'))
for _, row in president_dates.iterrows()
}
# Use plt.text to position the x-tick labels manually below the heatmap
y_pos_below = 9 # Adjust as needed to position labels below the heatmap
for idx, (label, color) in enumerate(zip(x_labels, x_colors)):
plt.text(idx , y_pos_below, label, ha='center', va='center', fontsize=15, color=color, rotation=40)
start_date, end_date = date_mapping[label]
plt.text(idx+ .2, y_pos_below + .2, f'{start_date} - {end_date}', ha='center', va='center', fontsize=12, color='black', rotation=40)
legend_elements = [
Patch(facecolor='blue', edgecolor='blue', label='Democrat'),
Patch(facecolor='red', edgecolor='red', label='Republican')
]
plt.legend(handles=legend_elements,
loc='lower left',
bbox_to_anchor=(-0.3, -0.2), # Coordinates for 'bbox_to_anchor' are relative to the axes
title="Fontcolor of Presidents",
fontsize='small', # Adjust text size
title_fontsize='medium', # Adjust title font size
frameon=True, # Toggle the frame
shadow=True) # Add shadow for better visibility
plt.xticks(ticks=[], labels=[]) # Remove default xticks to avoid overlap
# Set y-axis tick labels (word list)
plt.yticks(ticks=range(len(fumpy_transposed.index)), labels=fumpy_transposed.index, fontsize=20)
plt.xlabel('President', fontsize=20, labelpad=130)
plt.ylabel('Entities', fontsize=20)
plt.title(f'Cold War Entity Frequency by Presidents \n {startyear} - {endyear} Standardized \n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.3, "The heatmap displays the Standardized frequency of specific entities mentioned in President Speeches during the signifiacnt years of Cold War, with annotations indicating the start and end dates of of presidents in office during that time period starting with the announcement of the Truman Doctrine in 1947 and ends with Gorbachev resigning and what was left of the Soviet parliament voted to end itself. Red labels highlight Republican Presidents and Blue higlights Depmocraric Presidents.", wrap=True, horizontalalignment='center', fontsize=15)
plt.show()
# Initialize a MinMaxScaler
scaler = MinMaxScaler()
# Scale the data
fumpy_scaled = pd.DataFrame(scaler.fit_transform(fumpy_transposed), columns=fumpy_transposed.columns, index=fumpy_transposed.index)
# Set the size of the heatmap
plt.figure(figsize=(15, 7)) # You can adjust the size as needed
# Create the transposed DataFrame
fumpy_transposed = fumpy_numeric.T
# Create the heatmap without annotations first
sns.heatmap(fumpy_scaled, annot=False, cmap='Blues')
# Annotate each cell with the corresponding value
for i in range(len(fumpy_transposed.index)):
for j in range(len(fumpy_transposed.columns)):
plt.text(j + 0.5, i + 0.5, fumpy_transposed.iloc[i, j], ha='center', va='center', color='red', fontsize=17)
# Set custom x-axis tick colors
x_labels = fumpy_transposed.columns
x_colors = ['red' if label in republican_presidents_df['Name'].values else 'blue' for label in x_labels]
# Map the start and end dates for each president
date_mapping = {
row['president']: (row['first_speech'].strftime('%Y-%m-%d'), row['last_speech'].strftime('%Y-%m-%d'))
for _, row in president_dates.iterrows()
}
# Use plt.text to position the x-tick labels manually below the heatmap
y_pos_below = 9 # Adjust as needed to position labels below the heatmap
for idx, (label, color) in enumerate(zip(x_labels, x_colors)):
plt.text(idx , y_pos_below, label, ha='center', va='center', fontsize=15, color=color, rotation=40)
start_date, end_date = date_mapping[label]
plt.text(idx+ .2, y_pos_below + .2, f'{start_date} - {end_date}', ha='center', va='center', fontsize=12, color='black', rotation=40)
legend_elements = [
Patch(facecolor='blue', edgecolor='blue', label='Democrat'),
Patch(facecolor='red', edgecolor='red', label='Republican')
]
plt.legend(handles=legend_elements,
loc='lower left',
bbox_to_anchor=(-0.3, -0.2), # Coordinates for 'bbox_to_anchor' are relative to the axes
title="Fontcolor of Presidents",
fontsize='small', # Adjust text size
title_fontsize='medium', # Adjust title font size
frameon=True, # Toggle the frame
shadow=True) # Add shadow for better visibility
plt.xticks(ticks=[], labels=[]) # Remove default xticks to avoid overlap
# Set y-axis tick labels (word list)
plt.yticks(ticks=range(len(fumpy_transposed.index)), labels=fumpy_transposed.index, fontsize=20)
plt.xlabel('President', fontsize=20, labelpad=130)
plt.ylabel('Entities', fontsize=20)
plt.title(f'Cold War Entity Frequency by Presidents \n {startyear} - {endyear} MinMax Scaled \n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.3, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in President Speeches during the signifiacnt years of Cold War, with annotations indicating the start and end dates of of presidents in office during that time period starting with the announcement of the Truman Doctrine in 1947 and ends with Gorbachev resigning and what was left of the Soviet parliament voted to end itself. Red labels highlight Republican Presidents and Blue higlights Depmocraric Presidents.", wrap=True, horizontalalignment='center', fontsize=15)
plt.show()
In [ ]:
In [ ]:
Case Study 3 Middle Eastern Entities¶
In [248]:
samp_sorted= pd.read_csv('Data_Processing/outputfinal_filename.csv')
startyear=1949
endyear=2023
startDate=19490816 #1949, August 16. Navy establishes Middle East Force.
endDate=20230222 # Now, ealriest speach in data
gents = [
"Iraq", "the Middle East", "Afghanistan", "Iran",
"Iraqi", "Taliban", "Afghan", "al Qaeda",
"Arab", "Saddam Hussein", "Saddam Hussein's",
"Iraqis", "Somalia", "Muslim", "Al Qaeda",
"Osama bin Laden", "Afghans", "Assad", "Lebanon", "Army", "Navy", 'Nato'
]
bummy = samp_sorted.loc[(samp_sorted['Year'] >= startyear)]
bummy.reset_index(inplace=True)
bummy.drop('index', axis=1, inplace=True)
df1 = pd.DataFrame(columns=['doc_name', 'date', 'transcript', 'president', 'title', 'Year'])
prev=startDate
for i in range(len(bummy)):
date_string = bummy.loc[bummy.index[i], 'date'] # Accessing the 'date' column for each row
cleaned_string = date_string.replace('-', '')
# Convert string to number
if 'T' in cleaned_string:
cleaned_string=cleaned_string.split('T')[0]
result = int(cleaned_string)
if result<9999:
original_str = str(prev)
bluuuh = str(result)
# Cut off the last four characters
last_four = original_str[-4:]
# Combine the remaining string with the new string
res = bluuuh + last_four
result =int(res)
prev=result
fumm=str(result)
summmy=fumm[:4] + '-' + fumm[4:6] + '-' + fumm[6:]
bummy.loc[bummy.index[i], 'date'] = summmy
if startDate <= result <= endDate:
# Assuming df1 is the DataFrame you want to add a row to, and df2 is the DataFrame from which you want to add the row
row_to_add = bummy.loc[i] # Select the row you want to add from df2
df1.loc[len(df1)] = row_to_add.values
# Ensure the 'date' column is in datetime format
df1['date'] = pd.to_datetime(df1['date'])
# Create a DataFrame with each president's first and last speech date
president_dates = df1.groupby('president')['date'].agg(['min', 'max']).reset_index()
president_dates.columns = ['president', 'first_speech', 'last_speech']
# Print the new DataFrame
print(president_dates)
# Create an empty DataFrame to store the counts
fumpy = pd.DataFrame(columns=['president'] + gents)
# Iterate over each president in df1
for president in df1['president'].unique():
# Initialize a dictionary to store the counts for each president
president_counts = {'president': president}
# Filter df1 for speeches by the current president
president_df = df1[df1['president'] == president]
# Iterate over each word in gents
for word in gents:
# Count the occurrences of the word in the current president's speeches
word_count = sum(president_df['transcript'].str.count(word))
# Add the count to the dictionary
president_counts[word] = word_count
# Append the counts for the current president to fumpy
fumpy = fumpy.append(president_counts, ignore_index=True)
# Set the 'president' column as the index
fumpy.set_index('president', inplace=True)
# Calculate the sum of each column and sort the columns by the sum in descending order
fumpy = fumpy.loc[:, fumpy.sum().sort_values(ascending=False).index]
republican_presidents_df = pd.DataFrame({
"Name": [
"Donald Trump",
"Dwight D. Eisenhower",
"George H. W. Bush",
"George W. Bush",
"Gerald Ford",
"Richard M. Nixon",
"Ronald Reagan"
]
})
# Print the DataFrame fumpy
print(fumpy)
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:29: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
bummy.drop('index', axis=1, inplace=True)
president first_speech last_speech 0 Barack Obama 2008-08-28 2017-01-10 1 Bill Clinton 1993-01-20 2001-01-18 2 Donald Trump 2017-01-20 2021-01-20 3 Dwight D. Eisenhower 1953-01-20 1961-01-17 4 George H. W. Bush 1988-08-18 1993-01-05 5 George W. Bush 2001-01-20 2009-01-15 6 Gerald Ford 1974-08-09 1977-01-12 7 Harry S. Truman 1950-01-04 1953-01-15 8 Jimmy Carter 1976-09-23 1981-01-14 9 Joe Biden 2021-01-20 2023-02-22 10 John F. Kennedy 1960-07-15 1963-10-26 11 Lyndon B. Johnson 1963-05-30 1969-01-14 12 Richard M. Nixon 1952-09-23 1974-08-09 13 Ronald Reagan 1964-10-27 1989-01-11
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True)
Iraq Afghan Afghanistan the Middle East Iran Iraqi Arab \
president
Harry S. Truman 0 0 0 0 3 0 0
Richard M. Nixon 0 0 0 15 1 0 5
Dwight D. Eisenhower 3 0 0 34 5 0 6
John F. Kennedy 1 0 0 16 1 1 1
Lyndon B. Johnson 0 1 1 17 2 0 11
Ronald Reagan 6 57 52 51 38 0 27
Gerald Ford 0 0 0 13 0 0 2
Jimmy Carter 6 22 22 45 65 0 45
George H. W. Bush 115 3 3 23 6 23 42
Bill Clinton 12 0 0 26 3 1 5
George W. Bush 540 94 84 74 46 218 24
Barack Obama 179 188 121 30 71 44 25
Donald Trump 19 18 15 25 103 1 17
Joe Biden 10 228 159 3 4 0 4
Lebanon Army al Qaeda ... Saddam Hussein Iraqis Somalia \
president ...
Harry S. Truman 0 2 0 ... 0 0 0
Richard M. Nixon 0 2 0 ... 0 0 0
Dwight D. Eisenhower 2 4 0 ... 0 0 0
John F. Kennedy 2 12 0 ... 0 0 0
Lyndon B. Johnson 2 24 0 ... 0 0 0
Ronald Reagan 134 7 0 ... 0 0 0
Gerald Ford 0 2 0 ... 0 0 0
Jimmy Carter 1 5 0 ... 0 0 0
George H. W. Bush 3 9 0 ... 26 2 29
Bill Clinton 1 13 0 ... 8 0 34
George W. Bush 11 24 24 ... 55 61 1
Barack Obama 2 10 88 ... 2 13 4
Donald Trump 2 24 1 ... 0 0 2
Joe Biden 0 3 12 ... 0 0 3
Navy Al Qaeda Assad Osama bin Laden Afghans Nato \
president
Harry S. Truman 1 0 0 0 0 0
Richard M. Nixon 0 0 0 0 0 0
Dwight D. Eisenhower 7 0 0 0 0 0
John F. Kennedy 3 0 0 0 0 0
Lyndon B. Johnson 9 0 0 0 0 0
Ronald Reagan 8 0 0 0 0 0
Gerald Ford 2 0 0 0 0 0
Jimmy Carter 8 0 0 0 0 0
George H. W. Bush 3 0 0 0 0 0
Bill Clinton 2 0 0 1 0 0
George W. Bush 6 41 0 3 2 0
Barack Obama 3 4 31 17 9 14
Donald Trump 10 1 3 1 0 0
Joe Biden 0 2 0 9 19 0
Saddam Hussein's
president
Harry S. Truman 0
Richard M. Nixon 0
Dwight D. Eisenhower 0
John F. Kennedy 0
Lyndon B. Johnson 0
Ronald Reagan 0
Gerald Ford 0
Jimmy Carter 0
George H. W. Bush 6
Bill Clinton 0
George W. Bush 7
Barack Obama 0
Donald Trump 0
Joe Biden 0
[14 rows x 22 columns]
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True) /var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/2711461420.py:109: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. fumpy = fumpy.append(president_counts, ignore_index=True)
In [249]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.patches import Rectangle
# Fill missing values with zeros
fumpy_filled = fumpy.fillna(0)
# Convert fumpy to numeric type
fumpy_numeric = fumpy_filled.apply(pd.to_numeric)
# Set the size of the heatmap
plt.figure(figsize=(18, 10)) # You can adjust the size as needed
# Create the transposed DataFrame
fumpy_transposed = fumpy_numeric.T
# Create the heatmap without annotations first
sns.heatmap(fumpy_transposed, annot=False, cmap='Blues')
# Annotate each cell with the corresponding value
for i in range(len(fumpy_transposed.index)):
for j in range(len(fumpy_transposed.columns)):
plt.text(j + 0.5, i + 0.5, fumpy_transposed.iloc[i, j], ha='center', va='center', color='red', fontsize=17)
# Set custom x-axis tick colors
x_labels = fumpy_transposed.columns
x_colors = ['red' if label in republican_presidents_df['Name'].values else 'blue' for label in x_labels]
# Map the start and end dates for each president
date_mapping = {
row['president']: (row['first_speech'].strftime('%Y-%m-%d'), row['last_speech'].strftime('%Y-%m-%d'))
for _, row in president_dates.iterrows()
}
# Use plt.text to position the x-tick labels manually below the heatmap
y_pos_below = 25 # Adjust as needed to position labels below the heatmap
for idx, (label, color) in enumerate(zip(x_labels, x_colors)):
plt.text(idx-0.0, y_pos_below -0.5, label, ha='center', va='center', fontsize=16, color=color, rotation=40)
start_date, end_date = date_mapping[label]
plt.text(idx+0.1, y_pos_below -0.05, f'{start_date} - {end_date}', ha='center', va='center', fontsize=12, color='black', rotation=40)
legend_elements = [
Patch(facecolor='blue', edgecolor='blue', label='Democrat'),
Patch(facecolor='red', edgecolor='red', label='Republican')
]
plt.legend(handles=legend_elements,
loc='lower left',
bbox_to_anchor=(-0.3, -0.2), # Coordinates for 'bbox_to_anchor' are relative to the axes
title="Fontcolor of Presidents",
fontsize='small', # Adjust text size
title_fontsize='medium', # Adjust title font size
frameon=True, # Toggle the frame
shadow=True) # Add shadow for better visibility
plt.xticks(ticks=[], labels=[]) # Remove default xticks to avoid overlap
# Set y-axis tick labels (word list)
plt.yticks(ticks=range(len(fumpy_transposed.index)), labels=fumpy_transposed.index, fontsize=20)
plt.xlabel('President', fontsize=20, labelpad=135)
plt.ylabel('Entities', fontsize=15)
plt.title(f'Middle East Entity Frequency by Presidents \n {startyear} - {endyear} \n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.15, "The heatmap displays the frequency of specific entities mentioned in President Speeches during the signifiacnt years of Middle East involvement, with annotations indicating the start and end dates of of presidents in office during that time period starts with the Navy establishing Middle East Force for first time and this period is still going on to this day so ends with the earliest speech in data. Red labels highlight Republican Presidents and Blue higlights Depmocraric Presidents.", wrap=True, horizontalalignment='center', fontsize=15)
plt.show()
# Initialize a StandardScaler
scaler = StandardScaler()
# Scale the data
fumpy_standardized = pd.DataFrame(scaler.fit_transform(fumpy_transposed), columns=fumpy_transposed.columns, index=fumpy_transposed.index)
# Set the size of the heatmap
plt.figure(figsize=(18, 10)) # You can adjust the size as needed
# Create the transposed DataFrame
fumpy_transposed = fumpy_numeric.T
# Create the heatmap without annotations first
sns.heatmap(fumpy_standardized, annot=False, cmap='Blues')
# Annotate each cell with the corresponding value
for i in range(len(fumpy_transposed.index)):
for j in range(len(fumpy_transposed.columns)):
plt.text(j + 0.5, i + 0.5, fumpy_transposed.iloc[i, j], ha='center', va='center', color='red', fontsize=17)
# Set custom x-axis tick colors
x_labels = fumpy_transposed.columns
x_colors = ['red' if label in republican_presidents_df['Name'].values else 'blue' for label in x_labels]
# Map the start and end dates for each president
date_mapping = {
row['president']: (row['first_speech'].strftime('%Y-%m-%d'), row['last_speech'].strftime('%Y-%m-%d'))
for _, row in president_dates.iterrows()
}
# Use plt.text to position the x-tick labels manually below the heatmap
y_pos_below = 25 # Adjust as needed to position labels below the heatmap
for idx, (label, color) in enumerate(zip(x_labels, x_colors)):
plt.text(idx-0.0, y_pos_below -0.5, label, ha='center', va='center', fontsize=16, color=color, rotation=40)
start_date, end_date = date_mapping[label]
plt.text(idx+0.1, y_pos_below -0.05, f'{start_date} - {end_date}', ha='center', va='center', fontsize=12, color='black', rotation=40)
legend_elements = [
Patch(facecolor='blue', edgecolor='blue', label='Democrat'),
Patch(facecolor='red', edgecolor='red', label='Republican')
]
plt.legend(handles=legend_elements,
loc='lower left',
bbox_to_anchor=(-0.3, -0.2), # Coordinates for 'bbox_to_anchor' are relative to the axes
title="Fontcolor of Presidents",
fontsize='small', # Adjust text size
title_fontsize='medium', # Adjust title font size
frameon=True, # Toggle the frame
shadow=True) # Add shadow for better visibility
plt.xticks(ticks=[], labels=[]) # Remove default xticks to avoid overlap
# Set y-axis tick labels (word list)
plt.yticks(ticks=range(len(fumpy_transposed.index)), labels=fumpy_transposed.index, fontsize=20)
plt.xlabel('President', fontsize=20, labelpad=135)
plt.ylabel('Entities', fontsize=15)
plt.title(f'Middle East Entity Frequency by Presidents \n {startyear} - {endyear} Standardized\n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.15, "The heatmap displays the Standardized frequency of specific entities mentioned in President Speeches during the signifiacnt years of Middle East involvement, with annotations indicating the start and end dates of of presidents in office during that time period starts with the Navy establishing Middle East Force for first time and this period is still going on to this day so ends with the earliest speech in data. Red labels highlight Republican Presidents and Blue higlights Depmocraric Presidents.", wrap=True, horizontalalignment='center', fontsize=15)
plt.show()
# Initialize a MinMaxScaler
scaler = MinMaxScaler()
# Scale the data
fumpy_scaled = pd.DataFrame(scaler.fit_transform(fumpy_transposed), columns=fumpy_transposed.columns, index=fumpy_transposed.index)
# Set the size of the heatmap
plt.figure(figsize=(18, 10)) # You can adjust the size as needed
# Create the transposed DataFrame
fumpy_transposed = fumpy_numeric.T
# Create the heatmap without annotations first
sns.heatmap(fumpy_scaled, annot=False, cmap='Blues')
# Annotate each cell with the corresponding value
for i in range(len(fumpy_transposed.index)):
for j in range(len(fumpy_transposed.columns)):
plt.text(j + 0.5, i + 0.5, fumpy_transposed.iloc[i, j], ha='center', va='center', color='red', fontsize=17)
# Set custom x-axis tick colors
x_labels = fumpy_transposed.columns
x_colors = ['red' if label in republican_presidents_df['Name'].values else 'blue' for label in x_labels]
# Map the start and end dates for each president
date_mapping = {
row['president']: (row['first_speech'].strftime('%Y-%m-%d'), row['last_speech'].strftime('%Y-%m-%d'))
for _, row in president_dates.iterrows()
}
# Use plt.text to position the x-tick labels manually below the heatmap
y_pos_below = 25 # Adjust as needed to position labels below the heatmap
for idx, (label, color) in enumerate(zip(x_labels, x_colors)):
plt.text(idx-0.0, y_pos_below -0.5, label, ha='center', va='center', fontsize=16, color=color, rotation=40)
start_date, end_date = date_mapping[label]
plt.text(idx+0.1, y_pos_below -0.05, f'{start_date} - {end_date}', ha='center', va='center', fontsize=12, color='black', rotation=40)
legend_elements = [
Patch(facecolor='blue', edgecolor='blue', label='Democrat'),
Patch(facecolor='red', edgecolor='red', label='Republican')
]
plt.legend(handles=legend_elements,
loc='lower left',
bbox_to_anchor=(-0.3, -0.2), # Coordinates for 'bbox_to_anchor' are relative to the axes
title="Fontcolor of Presidents",
fontsize='small', # Adjust text size
title_fontsize='medium', # Adjust title font size
frameon=True, # Toggle the frame
shadow=True) # Add shadow for better visibility
plt.xticks(ticks=[], labels=[]) # Remove default xticks to avoid overlap
# Set y-axis tick labels (word list)
plt.yticks(ticks=range(len(fumpy_transposed.index)), labels=fumpy_transposed.index, fontsize=20)
plt.xlabel('President', fontsize=20, labelpad=135)
plt.ylabel('Entities', fontsize=15)
plt.title(f'Middle East Entity Frequency by Presidents \n {startyear} - {endyear} MinMax Scaled \n', fontsize=25)
# Add a caption
plt.figtext(0.4, -0.15, "The heatmap displays the MinMax Scaled frequency of specific entities mentioned in President Speeches during the signifiacnt years of Middle East involvement, with annotations indicating the start and end dates of of presidents in office during that time period starts with the Navy establishing Middle East Force for first time and this period is still going on to this day so ends with the earliest speech in data. Red labels highlight Republican Presidents and Blue higlights Depmocraric Presidents.", wrap=True, horizontalalignment='center', fontsize=15)
plt.show()
Below is just additional Data visualizations that help with understanding data but are not a a final submission and dont want to be considered for grading¶
In [265]:
import json
with open('Data_Processing/speeches.json', 'r') as file:
json_data = json.load(file)
# Convert JSON data to a string
json_string = json.dumps(json_data, indent=4) # indent is optional, for pretty-printing
# Load JSON data
with open('Data_Processing/speeches.json', 'r') as file:
data = json.load(file)
# Normalize JSON data
normalized_data = pd.json_normalize(data) # This assumes 'data' is a list of records
# Create DataFrame
df = pd.DataFrame(normalized_data)
In [266]:
samp_sorted= pd.read_csv('Data_Processing/outputfinal_filename.csv')
samp_sorted
Out[266]:
| doc_name | date | transcript | president | title | Year | |
|---|---|---|---|---|---|---|
| 0 | april-30-1789-first-inaugural-address | 1789-04-30 | Fellow Citizens of the Senate and the House of... | George Washington | April 30, 1789: First Inaugural Address | 1789 |
| 1 | october-3-1789-thanksgiving-proclamation | 1789-10-03 | Whereas it is the duty of all Nations to ackno... | George Washington | October 3, 1789: Thanksgiving Proclamation | 1789 |
| 2 | january-8-1790-first-annual-message-congress | 1790-01-08 | Fellow Citizens of the Senate and House of Rep... | George Washington | January 8, 1790: First Annual Message to Congress | 1790 |
| 3 | december-8-1790-second-annual-message-congress | 1790-12-08 | Fellow citizens of the Senate and House of Re... | George Washington | December 8, 1790: Second Annual Message to Con... | 1790 |
| 4 | december-29-1790-talk-chiefs-and-counselors-se... | 1790-12-29 | I the President of the United States, by my o... | George Washington | December 29, 1790: Talk to the Chiefs and Coun... | 1790 |
| ... | ... | ... | ... | ... | ... | ... |
| 1056 | may-24-2022-remarks-school-shooting-uvalde-texas | 2022-05-25 | Good evening, fellow Americans.\r\n\r\nI had h... | Joe Biden | May 24, 2022: Remarks on School Shooting in Uv... | 2022 |
| 1057 | september-1-2022-remarks-continued-battle-soul... | 2022-09-02 | THE PRESIDENT: My fellow Americans, please, if... | Joe Biden | September 1, 2022: Remarks on the Continued Ba... | 2022 |
| 1058 | september-21-2022-speech-77th-session-united-n... | 2022-09-21 | Thank you. \r\n\r\nMr. President, Mr. Secretar... | Joe Biden | September 21, 2022: Speech before the 77th Ses... | 2022 |
| 1059 | february-7-2023-state-union-address | 2023-02-08 | Mr. Speaker. Madam Vice President. Our First L... | Joe Biden | February 7, 2023: State of the Union Address | 2023 |
| 1060 | february-21-2023-remarks-one-year-anniversary-... | 2023-02-22 | THE PRESIDENT: Hello, Poland! One of our grea... | Joe Biden | February 21, 2023: Remarks on the One-Year Ann... | 2023 |
1061 rows × 6 columns
In [267]:
!pip install selenium
import nltk
# Download the required NLTK corpora
nltk.download('state_union')
# Now, you can safely load and use the 'state_union' corpus
from nltk.corpus import state_union
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time, os
import random
import pickle
from nltk.probability import FreqDist
from nltk.corpus import state_union
import re
import string
from nltk.stem import LancasterStemmer
nltk.download('state_union')
zetemp=samp_sorted
zetemp.sort_values(by=['Year'], inplace=True)
zetemp.head(3)
# Text preprocessing steps - remove numbers, captial letters and punctuation
alphanumeric = lambda x: re.sub('\w*\d\w*', ' ', x)
punc_lower = lambda x: re.sub('[%s]' % re.escape(string.punctuation), ' ', x.lower())
no_n = lambda x: re.sub('\n', '', x)
no_r = lambda x: re.sub('\r', '', x)
no_hyphen = lambda x: re.sub('-', ' ', x)
zetemp['transcript'] = zetemp.transcript.map(alphanumeric).map(punc_lower).map(no_n).map(no_r).map(no_hyphen)
zetemp['Split'] = zetemp.transcript.str.split()
zetemp.head(3)
stemmer = LancasterStemmer()
zetemp['Stemmed'] = zetemp['Split'].apply(lambda x: [stemmer.stem(y) for y in x])# Stem every word.
zetemp.sample(3)
zetemp.to_csv('Data_Processing/Presidents_Speeches.csv')
zetemp.to_pickle("my_Presidents.pkl")
with open('my_Presidents.pkl', 'wb') as picklefile:
pickle.dump(zetemp, picklefile)
with open("my_Presidents.pkl", 'rb') as picklefile:
my_old_df = pickle.load(picklefile)
zetemp = my_old_df
zetemp.reset_index(level = 0, inplace = True)
zetemp = df.rename(columns = {'index':'Title'})
zetemp.head(3)
Requirement already satisfied: selenium in /opt/anaconda3/lib/python3.9/site-packages (4.15.2) Requirement already satisfied: urllib3[socks]<3,>=1.26 in /opt/anaconda3/lib/python3.9/site-packages (from selenium) (1.26.7) Requirement already satisfied: trio~=0.17 in /opt/anaconda3/lib/python3.9/site-packages (from selenium) (0.23.1) Requirement already satisfied: trio-websocket~=0.9 in /opt/anaconda3/lib/python3.9/site-packages (from selenium) (0.11.1) Requirement already satisfied: certifi>=2021.10.8 in /opt/anaconda3/lib/python3.9/site-packages (from selenium) (2021.10.8) Requirement already satisfied: exceptiongroup>=1.0.0rc9 in /opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (1.2.0) Requirement already satisfied: idna in /opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (3.2) Requirement already satisfied: attrs>=20.1.0 in /opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (21.2.0) Requirement already satisfied: sniffio>=1.3.0 in /opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (1.3.0) Requirement already satisfied: outcome in /opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (1.3.0.post0) Requirement already satisfied: sortedcontainers in /opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (2.4.0) Requirement already satisfied: wsproto>=0.14 in /opt/anaconda3/lib/python3.9/site-packages (from trio-websocket~=0.9->selenium) (1.2.0) Requirement already satisfied: PySocks!=1.5.7,<2.0,>=1.5.6 in /opt/anaconda3/lib/python3.9/site-packages (from urllib3[socks]<3,>=1.26->selenium) (1.7.1) Requirement already satisfied: h11<1,>=0.9.0 in /opt/anaconda3/lib/python3.9/site-packages (from wsproto>=0.14->trio-websocket~=0.9->selenium) (0.14.0)
[nltk_data] Downloading package state_union to [nltk_data] /Users/brukeamare/nltk_data... [nltk_data] Package state_union is already up-to-date! [nltk_data] Downloading package state_union to [nltk_data] /Users/brukeamare/nltk_data... [nltk_data] Package state_union is already up-to-date!
Out[267]:
| doc_name | date | transcript | president | title | |
|---|---|---|---|---|---|
| 0 | april-18-1977-address-nation-energy | 1977-04-18 | Good evening.\r\nTonight I want to have an unp... | Jimmy Carter | April 18, 1977: Address to the Nation on Energy |
| 1 | april-25-1980-statement-iran-rescue-mission | 1980-04-25 | Late yesterday, I cancelled a carefully planne... | Jimmy Carter | April 25, 1980: Statement on the Iran Rescue M... |
| 2 | august-14-1980-acceptance-speech-democratic-na... | 1980-08-14 | Fellow Democrats, fellow citizens:\r\n\r\nI th... | Jimmy Carter | August 14, 1980: Acceptance Speech at the Demo... |
In [268]:
zetemp.president.value_counts()
Out[268]:
Lyndon B. Johnson 71 Ronald Reagan 60 Barack Obama 51 Franklin D. Roosevelt 49 John F. Kennedy 45 Donald Trump 43 George W. Bush 40 Bill Clinton 39 Woodrow Wilson 33 Ulysses S. Grant 32 Andrew Johnson 31 Grover Cleveland 30 Herbert Hoover 30 Andrew Jackson 26 James K. Polk 25 Thomas Jefferson 24 George H. W. Bush 23 Richard M. Nixon 23 Benjamin Harrison 23 Jimmy Carter 22 James Madison 22 Theodore Roosevelt 22 Joe Biden 21 George Washington 21 Harry S. Truman 19 Warren G. Harding 18 John Tyler 18 Rutherford B. Hayes 16 Abraham Lincoln 15 Dwight D. Eisenhower 15 Franklin Pierce 15 William McKinley 14 Gerald Ford 14 James Buchanan 14 Calvin Coolidge 12 William Taft 12 Chester A. Arthur 11 Martin Van Buren 10 James Monroe 10 John Adams 9 John Quincy Adams 9 Millard Fillmore 7 Zachary Taylor 4 James A. Garfield 1 William Harrison 1 Name: president, dtype: int64
In [269]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
plt.figure(figsize=(8, 14))
my_colors = ['orange','orange','orange','orange','orange','orange', 'darkviolet', 'darkviolet', 'orange', 'orange','darkviolet', 'orange',
'orange', 'orange', 'darkviolet','orange', 'orange','orange', 'orange', 'orange', 'darkviolet', 'darkviolet', 'orange','darkviolet',
'orange','darkviolet', 'orange', 'orange','darkviolet', 'darkviolet','orange','darkviolet', 'orange', 'darkviolet',
'orange', 'darkviolet','darkviolet','darkviolet','darkviolet','orange', 'orange', 'darkviolet','darkviolet','darkviolet','darkviolet']
df.president.value_counts(ascending=True).plot(kind = 'barh', color=my_colors)
plt.xlabel('Number of Speeches', fontsize = 15)
plt.ylabel('Presidents', fontsize = 16)
plt.yticks(fontsize=15)
plt.xticks(fontsize=15)
plt.title('Number of Speeches per President', fontsize = 20)
plt.tight_layout()
plt.savefig('No._Pres_Speeches.pdf');
In [270]:
samp_sorted= pd.read_csv('Data_Processing/outputfinal_filename.csv')
samp_sorted
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df= pd.read_csv('Data_Processing/outputfinal_filename.csv')
df
# Count the number of words in each transcript
df['word_count'] = df['transcript'].str.split().str.len()
# Group by decade (assuming 'Year' column is the last 4 digits of the 'date' column)
df['decade'] = df['date'].str[:4].astype(int) // 10 * 10
# Increase font size and set a single color
sns.set(style='whitegrid')
plt.figure(figsize=(20, 10))
sns.boxplot(x='decade', y='word_count', data=df, color='lightblue') # Adjust the color as needed
# Customize plot appearance
plt.title('Word Count of Presidential Speeches Over Decades', fontsize=24)
plt.xlabel('Decade', fontsize=18)
plt.ylabel('Word Count', fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xticks(rotation=45) # Rotate x labels if needed
# Show the plot
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Messing Around with Data starting here to figure out how to represent the knowledge maps This part is for the relational entites analysis, Analysis is done but I failed in representing it properly, and nicely¶
In [250]:
charted= pd.read_csv('Data_Processing/chart_ordered.csv')
charted
Out[250]:
| entity1 | entity2 | avg_sentiment | entity_count | connect_count | |
|---|---|---|---|---|---|
| 0 | United States | Government | 0.093470 | 40135 | 1574 |
| 1 | United States | America | 0.115637 | 40135 | 969 |
| 2 | United States | President | 0.084311 | 40135 | 901 |
| 3 | United States | Congress | 0.102111 | 40135 | 714 |
| 4 | United States | Union | 0.088627 | 40135 | 467 |
| ... | ... | ... | ... | ... | ... |
| 141863 | Applied Physics | Lab | 0.200000 | 1 | 1 |
| 141864 | List | COVID-19 | 0.107143 | 1 | 1 |
| 141865 | thatHe | ’s | 0.800000 | 1 | 1 |
| 141866 | militaryHe | ’s | -0.100000 | 1 | 1 |
| 141867 | Q:—to Rick | Bright | 0.700000 | 1 | 1 |
141868 rows × 5 columns
In [251]:
months=['January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December']
count=0
for i in range(len(charted)):
jack=str(charted.loc[i,'entity2'])
for month in months:
if month in jack :
charted = charted.drop(i)
#count+=1
In [252]:
charted
Out[252]:
| entity1 | entity2 | avg_sentiment | entity_count | connect_count | |
|---|---|---|---|---|---|
| 0 | United States | Government | 0.093470 | 40135 | 1574 |
| 1 | United States | America | 0.115637 | 40135 | 969 |
| 2 | United States | President | 0.084311 | 40135 | 901 |
| 3 | United States | Congress | 0.102111 | 40135 | 714 |
| 4 | United States | Union | 0.088627 | 40135 | 467 |
| ... | ... | ... | ... | ... | ... |
| 141863 | Applied Physics | Lab | 0.200000 | 1 | 1 |
| 141864 | List | COVID-19 | 0.107143 | 1 | 1 |
| 141865 | thatHe | ’s | 0.800000 | 1 | 1 |
| 141866 | militaryHe | ’s | -0.100000 | 1 | 1 |
| 141867 | Q:—to Rick | Bright | 0.700000 | 1 | 1 |
138663 rows × 5 columns
In [253]:
charted.reset_index(inplace=True)
charted.drop('index', axis=1, inplace=True)
charted
Out[253]:
| entity1 | entity2 | avg_sentiment | entity_count | connect_count | |
|---|---|---|---|---|---|
| 0 | United States | Government | 0.093470 | 40135 | 1574 |
| 1 | United States | America | 0.115637 | 40135 | 969 |
| 2 | United States | President | 0.084311 | 40135 | 901 |
| 3 | United States | Congress | 0.102111 | 40135 | 714 |
| 4 | United States | Union | 0.088627 | 40135 | 467 |
| ... | ... | ... | ... | ... | ... |
| 138658 | Applied Physics | Lab | 0.200000 | 1 | 1 |
| 138659 | List | COVID-19 | 0.107143 | 1 | 1 |
| 138660 | thatHe | ’s | 0.800000 | 1 | 1 |
| 138661 | militaryHe | ’s | -0.100000 | 1 | 1 |
| 138662 | Q:—to Rick | Bright | 0.700000 | 1 | 1 |
138663 rows × 5 columns
In [254]:
charted.loc[charted['avg_sentiment']<-0.9]
Out[254]:
| entity1 | entity2 | avg_sentiment | entity_count | connect_count | |
|---|---|---|---|---|---|
| 635 | United States | Ukraine — | -1.0 | 40135 | 4 |
| 636 | United States | — Ukraine | -1.0 | 40135 | 4 |
| 13160 | Union | Korea | -1.0 | 3623 | 1 |
| 13161 | Union | U.S. | -1.0 | 3623 | 1 |
| 13162 | Union | North Korea | -1.0 | 3623 | 1 |
| ... | ... | ... | ... | ... | ... |
| 136833 | Paul II | God | -1.0 | 2 | 1 |
| 137057 | Nineteen | Nancy | -1.0 | 2 | 1 |
| 137058 | Nineteen | RonaldReagan | -1.0 | 2 | 1 |
| 137504 | Life Synagogue | Pittsburgh | -1.0 | 1 | 1 |
| 138202 | Beast | Belsen | -1.0 | 1 | 1 |
105 rows × 5 columns
In [255]:
# reove counts and connections less than 30, reomove entite\y2 which are substrings of entity1
chartoy=charted.loc[charted['entity_count']>29]
chartoy=chartoy.loc[charted['connect_count']>29]
chartoy.reset_index(inplace=True)
chartoy.drop('index', axis=1, inplace=True)
chartoy = chartoy[~chartoy.apply(lambda row: row['entity2'] in row['entity1'], axis=1)]
chartoy.reset_index(inplace=True)
chartoy.drop('index', axis=1, inplace=True)
chartoy.to_csv('Data_Processing/chart_ordy.csv', index=False)
chartoy
Out[255]:
| entity1 | entity2 | avg_sentiment | entity_count | connect_count | |
|---|---|---|---|---|---|
| 0 | United States | Government | 0.093470 | 40135 | 1574 |
| 1 | United States | America | 0.115637 | 40135 | 969 |
| 2 | United States | President | 0.084311 | 40135 | 901 |
| 3 | United States | Congress | 0.102111 | 40135 | 714 |
| 4 | United States | Union | 0.088627 | 40135 | 467 |
| ... | ... | ... | ... | ... | ... |
| 1246 | Export | Import Bank | 0.126172 | 86 | 30 |
| 1247 | Soviet Premier | Khrushchev | 0.002520 | 83 | 31 |
| 1248 | President López | Obrador | 0.190157 | 70 | 42 |
| 1249 | Choctaw | Chickasaw | 0.196028 | 70 | 35 |
| 1250 | Fifty | Congress | 0.247037 | 65 | 30 |
1251 rows × 5 columns
In [256]:
chartoy.loc[chartoy['avg_sentiment']<0]
Out[256]:
| entity1 | entity2 | avg_sentiment | entity_count | connect_count | |
|---|---|---|---|---|---|
| 50 | United States | Island | -0.016570 | 40135 | 66 |
| 72 | United States | Rhode Island | -0.044448 | 40135 | 53 |
| 96 | United States | Departments | -0.010184 | 40135 | 39 |
| 100 | United States | Minister | -0.031579 | 40135 | 38 |
| 104 | United States | Affairs | -0.034864 | 40135 | 35 |
| 122 | United States | Foreign Affairs | -0.046774 | 40135 | 31 |
| 272 | President | Island | -0.032758 | 14738 | 51 |
| 275 | President | Rhode Island | -0.045748 | 14738 | 49 |
| 278 | President | Executive | -0.028899 | 14738 | 46 |
| 331 | State | Affairs | -0.036293 | 8538 | 35 |
| 332 | State | France | -0.025873 | 8538 | 35 |
| 341 | State | Minister | -0.051667 | 8538 | 30 |
| 342 | State | Foreign Affairs | -0.051667 | 8538 | 30 |
| 365 | Secretary | France | -0.035937 | 7244 | 32 |
| 366 | Secretary | Foreign Affairs | -0.050968 | 7244 | 31 |
| 369 | Secretary | Minister | -0.017742 | 7244 | 31 |
| 475 | House | Island | -0.050035 | 3840 | 49 |
| 476 | House | Rhode Island | -0.058333 | 3840 | 48 |
| 480 | House | Executive | -0.047724 | 3840 | 34 |
| 513 | Treasury | Bank | -0.036632 | 3369 | 35 |
| 525 | Republic | Cuba | -0.035573 | 3356 | 47 |
| 718 | Executive | Island | -0.013556 | 1527 | 55 |
| 719 | Executive | Rhode Island | -0.017356 | 1527 | 54 |
| 937 | JamesBuchanan | President | -0.026490 | 885 | 34 |
| 1061 | Rhode Island | Executive | -0.049221 | 581 | 53 |
| 1062 | Rhode Island | States | -0.047956 | 581 | 35 |
| 1084 | Nam | America | -0.018788 | 550 | 33 |
| 1085 | Nam | Russia | -0.006875 | 550 | 32 |
| 1086 | Nam | India | -0.006875 | 550 | 32 |
| 1087 | Nam | China | -0.006875 | 550 | 32 |
| 1163 | Minister | France | -0.037857 | 290 | 34 |
| 1164 | Minister | Affairs | -0.028800 | 290 | 33 |
| 1165 | Minister | Foreign Affairs | -0.044330 | 290 | 32 |
| 1181 | Ukraine | Warsaw | -0.262366 | 249 | 31 |
| 1189 | Foreign Affairs | France | -0.037857 | 236 | 34 |
| 1232 | Osama bin | Laden | -0.043686 | 120 | 40 |
| 1242 | t Senator | Kennedy | -0.038372 | 91 | 44 |
In [257]:
chartoy = chartoy.drop_duplicates(subset='entity1')
chartoy.reset_index(inplace=True)
chartoy.drop('index', axis=1, inplace=True)
chartoy
/var/folders/4k/4vfxm2nd59b749g493qd5bkr0000gn/T/ipykernel_1428/3296870759.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
chartoy.drop('index', axis=1, inplace=True)
Out[257]:
| entity1 | entity2 | avg_sentiment | entity_count | connect_count | |
|---|---|---|---|---|---|
| 0 | United States | Government | 0.093470 | 40135 | 1574 |
| 1 | Congress | States | 0.101565 | 15251 | 1472 |
| 2 | Government | States | 0.101808 | 15089 | 1975 |
| 3 | President | States | 0.093720 | 14738 | 1686 |
| 4 | State | States | 0.077760 | 8538 | 1044 |
| ... | ... | ... | ... | ... | ... |
| 244 | Export | Bank | 0.120257 | 86 | 33 |
| 245 | Soviet Premier | Khrushchev | 0.002520 | 83 | 31 |
| 246 | President López | Obrador | 0.190157 | 70 | 42 |
| 247 | Choctaw | Chickasaw | 0.196028 | 70 | 35 |
| 248 | Fifty | Congress | 0.247037 | 65 | 30 |
249 rows × 5 columns
In [258]:
original_rows_to_remove = [249, 245, 236, 232, 221, 188, 177, 160, 129, 128, 118, 115, 110, 105, 90, 84, 76, 75, 69, 56, 54, 52, 48, 39, 38, 36, 31, 24]
# Adjusting to 0-based indexing by subtracting 1 from each index
rows_to_remove = [x - 1 for x in original_rows_to_remove]
# Remove the specified rows
chartoy = chartoy.drop(rows_to_remove)
# Optional: Reset index if you want a continuous index after removal
chartoy = chartoy.reset_index(drop=True)
In [259]:
chartoy
Out[259]:
| entity1 | entity2 | avg_sentiment | entity_count | connect_count | |
|---|---|---|---|---|---|
| 0 | United States | Government | 0.093470 | 40135 | 1574 |
| 1 | Congress | States | 0.101565 | 15251 | 1472 |
| 2 | Government | States | 0.101808 | 15089 | 1975 |
| 3 | President | States | 0.093720 | 14738 | 1686 |
| 4 | State | States | 0.077760 | 8538 | 1044 |
| ... | ... | ... | ... | ... | ... |
| 216 | Nancy | RonaldReagan | 0.154602 | 89 | 34 |
| 217 | Reconstruction Finance | Corporation | 0.172128 | 89 | 31 |
| 218 | Soviet Premier | Khrushchev | 0.002520 | 83 | 31 |
| 219 | President López | Obrador | 0.190157 | 70 | 42 |
| 220 | Choctaw | Chickasaw | 0.196028 | 70 | 35 |
221 rows × 5 columns
Plotting Knowdlege map of the¶
In [260]:
def ploting_map(data_new, stwing= "Knowledge Map of Entity Relationships"):
# Creating a new graph
G_new = nx.Graph()
# Extracting all unique entities
all_entities_new = set(data_new['entity1']).union(set(data_new['entity2']))
# Creating a dictionary for entity sizes
default_size = 100 # Default size
entity_sizes_new = {entity: data_new[data_new['entity1'] == entity]['entity_count'].iloc[0]
if entity in data_new['entity1'].values else default_size
for entity in all_entities_new}
# Adding nodes with their sizes, scaled 5 times bigger
for entity, size in entity_sizes_new.items():
G_new.add_node(entity, size=size * 50) # Scaling the node size by 5 times
# Adding edges with varying thickness and color based on sentiment
for _, row in data_new.iterrows():
width = abs(row['avg_sentiment']) * 10 # Scale the width
color = 'red' if row['avg_sentiment'] < 0 else 'blue'
G_new.add_edge(row['entity1'], row['entity2'], weight=width, color=color)
# Extracting sizes for nodes, scaled 5 times bigger
sizes_new_scaled = [G_new.nodes[node]['size']/200 for node in G_new.nodes]
# Extracting colors and weights for edges
edge_colors_new = nx.get_edge_attributes(G_new,'color').values()
edge_weights_new = nx.get_edge_attributes(G_new,'weight').values()
pos = nx.spring_layout(G_new, k=1.5, iterations=20)
# Drawing the graph with scaled node sizes
plt.figure(figsize=(10, 8))
nx.draw(G_new,pos, with_labels=True, node_size=sizes_new_scaled, width=list(edge_weights_new), edge_color=list(edge_colors_new), node_color='skyblue', edgecolors='black')
plt.title(stwing)
plt.show()
The following Data Visulizations are just in testing and progress, not fully developed as it is too messy to be a finalized datavisual¶
In [261]:
ploting_map(charted.loc[charted['avg_sentiment']<0][:100], "100-150 negative sentiment over .75")
In [262]:
ploting_map(charted.loc[charted['avg_sentiment']<0][:400], "First 400 negative Sent. Relation Knowledge Map")
In [263]:
ploting_map(chartoy.loc[chartoy['avg_sentiment']<0], "all negative Sent. with counts more than 30")
In [264]:
ploting_map(chartoy.loc[chartoy['avg_sentiment']>0.25], "top positive Sent. of 0ver 0.25 Relation Knowledge Map top relation for each entity")
In [ ]:
In [ ]:
In [ ]: